library(tidyverse)
library(ggplot2)
library(ggsci)
net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/results/mic/"
expression_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/input/"
# BN folders
data_prefix = "mic_m46"
bn_results_dir = "/pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_mic_res/mic_modules/mic_m46/"
BN_run_dir = "/pastel/resources/bayesian_networks/CINDERellA/"
BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")
Expression data
mod2plot = 46 # Module we want to plot
# Expression data for a single set:
exprData = read.table(paste0(expression_dir, "mic.txt"), header = T, stringsAsFactors = F, check.names = F)
expr_matx = as.data.frame(exprData) # Residuals of the expression
gene_modules = read.table(paste0(net_dir, "geneBycluster.txt"), header = T, stringsAsFactors = F) # clusters from SpeakEasy
# Select the expression values for the module of interest
to_plot = gene_modules$ensembl[gene_modules$cluster_lv3 == mod2plot]
expr_matx_mod = expr_matx[to_plot, ]
# Save the input expression for the BN
# save(expr_matx_mod, file = paste0(bn_results_dir,data_prefix,"_dataExp.RData"))
# write_csv(as.data.frame(expr_matx_mod), file = BN_run_data, col_names = F)
# CINDERellA has two inputs: exp.txt and output_folder
setwd(BN_run_dir)
cmd_matlab_call = paste0("matlab -nodisplay -nojvm -nosplash -nodesktop -r")
cmd_matlab_param = paste0("runt=1000; data='",BN_run_data,"'; out_dir='",BN_output_dir,"'; run('",BN_run_dir,"CINDERellA.m')")
cmd_matlab_run = paste0(cmd_matlab_call, " \"",cmd_matlab_param,"\"")
cat(cmd_matlab_run)
system(cmd_matlab_run)
# Results are saved here:
print(BN_output_dir)
Read results
Edges filtered
# BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
# BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")
# load(file = paste0(bn_results_dir,data_prefix,"_BN_input.RData")) # phenotype_match,gene_net_metadata,mod_eigengen
# load(file = paste0(bn_results_dir,data_prefix,"_dataExp.RData"))
edgefrq = read_tsv(paste0(BN_output_dir,"/edgefrq.txt"), col_names = c("A","B","freq"), show_col_types = F)
dataExp = expr_matx_mod
edges_df = na.omit(cbind(edgefrq, rownames(dataExp)[edgefrq$A], rownames(dataExp)[edgefrq$B]))
colnames(edges_df) = c("fromNode", "toNode", "weight", "fromAltName", "toAltName") # match names for Cytoscape input
edges_df$fromAltName = gsub("(.*)\\.(.*)","\\1",edges_df$fromAltName)
edges_df$toAltName = gsub("(.*)\\.(.*)","\\1",edges_df$toAltName)
edges_filtered = edges_df[abs(edges_df$weight)>0.4, ] # weight default = 0.33
rownames(edges_filtered) = NULL
createDT(edges_filtered %>% arrange(-weight))
Nodes filtered
nodes_table = data.frame(nodeName = 1:nrow(dataExp), altName = gsub("(.*)\\.(.*)","\\1",rownames(dataExp))) %>% distinct()
nodes_table$altName = gsub("(.*)\\.(.*)","\\1",nodes_table$altName)
rownames(nodes_table) = NULL
nodes_table = na.omit(unique(nodes_table)) %>% left_join(unique(gene_modules[,c("ensembl","gene_name")]), by = c("altName"="ensembl"))
nodes_filtered = nodes_table[nodes_table$altName %in% unique(c(edges_filtered$fromAltName,edges_filtered$toAltName)), ]
nodes_filtered = nodes_filtered[! duplicated(nodes_filtered$altName), ]
createDT(nodes_filtered)
BN plot
plot_geneBN(edges_filtered = edges_filtered,
nodes_filtered = nodes_filtered)

Session
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gtools_3.9.4 ggforce_0.3.3 graphlayouts_0.8.0 ggraph_2.0.5
## [5] igraph_2.0.3 ggsci_3.0.3 lubridate_1.9.3 forcats_1.0.0
## [9] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5
## [13] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.0 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] ggrepel_0.9.5 Rcpp_1.0.12 digest_0.6.35 utf8_1.2.4
## [5] R6_2.5.1 evaluate_0.23 highr_0.10 pillar_1.9.0
## [9] rlang_1.1.3 rstudioapi_0.16.0 jquerylib_0.1.4 DT_0.30
## [13] rmarkdown_2.26 labeling_0.4.3 htmlwidgets_1.6.4 polyclip_1.10-6
## [17] bit_4.0.5 munsell_0.5.1 compiler_4.1.2 xfun_0.43
## [21] pkgconfig_2.0.3 htmltools_0.5.8.1 tidyselect_1.2.1 gridExtra_2.3
## [25] viridisLite_0.4.2 fansi_1.0.6 crayon_1.5.2 tzdb_0.4.0
## [29] withr_3.0.0 MASS_7.3-54 grid_4.1.2 jsonlite_1.8.8
## [33] gtable_0.3.4 lifecycle_1.0.4 magrittr_2.0.3 scales_1.3.0
## [37] cli_3.6.2 stringi_1.8.3 vroom_1.6.5 cachem_1.0.8
## [41] farver_2.1.1 viridis_0.6.2 bslib_0.7.0 generics_0.1.3
## [45] vctrs_0.6.5 tools_4.1.2 bit64_4.0.5 glue_1.7.0
## [49] tweenr_1.0.2 hms_1.1.3 crosstalk_1.2.1 shadowtext_0.1.1
## [53] parallel_4.1.2 fastmap_1.1.1 yaml_2.3.8 timechange_0.3.0
## [57] colorspace_2.1-0 tidygraph_1.2.0 knitr_1.46 sass_0.4.9